

libname mylib 'g:\Dropbox\Wall Street Bets (Private)\Data';


*This program creaes Daily_Panel_RFS which is referenced in many subsequent codes;
*any dataset with the "mylib" prefix is described in the "readme" file in the repliaction package;
*The replication file also includes psuedo datasets. The pseudo data includes a random sample of roughly 1% of all observations. 
The structure of the pseudo data is kept in tack to preserve the dimensions of the data (e.g., the number of variables).  
Thus, the pseudo-data are useful for illustrating the format of the data, 
however running the code of the pseudo-data will not generate the same results as reported in the paper.    




*start with CRSP daily data;




data crsp;
set MYLIB.CRSP17_21D;
*let's limit to common stocks;
if 10<=shrcd<=11;
abn_ret = ret - vwretd; 
year = year(date);
if year >=2017;
month = month(date);
*create cum_month which tracks the numbre of months since December 2017;
cum_month = (year -2018)* 12 + month;
prc = abs(prc);
keep permno date ticker year month cum_month prc ABN_RET shrcd;
run;


*WILL NOW BRING IN MY DD POSTS AT A DAILY LEVEL;


DATA SAMPLE;
set mylib.dd_posts18_21q2;;
RUN;

*day0 =  first day an ivnestor could have traded on the DD post;
PROC SORT DATA=SAMPLE;
BY TICKER PERMNO DAY0;
RUN;
proc univariate noprint data=SAMPLE;
by TICKER PERMNO day0;
var purchase;
output out=dd_posts_daily sum=buy_dd_posts n=total_dd_posts;
run;;

data dd_posts_daily;
set dd_posts_daily;
sell_dd_posts =  total_dd_posts - buy_dd_posts;
NET_DD_POSTS = BUY_DD_POSTS - SELL_DD_POSTS;
run;


proc sql;
create table crsp3B as
select *
from CRSP, dd_posts_daily
where crsp.TICKER = dd_posts_daily.TICKER and crsp.date =dd_posts_daily.day0;
quit;

PROC SORT DATA=CRSP;
BY TICKER DATE;
RUN;
PROC SORT DATA=CRSP3B;
BY TICKER DATE;
RUN;
DATA CRSP3;
MERGE CRSP CRSP3B;
BY TICKER DATE;
RUN;



DATA RET;
SET CRSP3;
IF TICKER = '' THEN DELETE;
KEEP TICKER ABN_RET DATE;
RUN;

proc sort data=RET;
by TICKER descending date;
run;


*compute compound returns over various intervals;


%macro calcret;
data RET2;
set RET;
  by TICKER;

/* Take natural logs of the firm return (ret) and decile size return (decret) */

   lfret=log(abn_ret+1);


/* Define arrays to create the lagged values */

    array lagfret[252] lagfret1-lagfret252;


/* This is the macro portion: a "do-loop" to create the 59 necessary lags */

    %do j=1 %to 252;
       lagfret&j=lag&j(lfret);
    %end;

/* The following statements set to missing lagged values that */
/* are reading the data corresponding to the previous PERMNO  */

    if first.TICKER then count=1;
    do i=count to 252;
      lagfret[i] = .;

    end;
    count +1 ;

/* Calculate the cumulative returns by adding the logs of the returns*/

  


  ret63= exp( sum(of  lagfret1-lagfret63)) -1;
    ret21= exp( sum(of  lagfret1-lagfret21)) -1;
	  ret5 =exp( sum(of  lagfret1-lagfret5)) -1;
	  ret1 = exp(lagfret1)-1;
 
    week1= exp( sum(of  lagfret1-lagfret5)) -1;
  week2= exp( sum(of  lagfret6-lagfret10)) -1;
  week3= exp( sum(of  lagfret11-lagfret15)) -1;
  week4= exp( sum(of  lagfret16-lagfret20)) -1;

week5= exp( sum(of  lagfret21-lagfret25)) -1;
  week6= exp( sum(of  lagfret26-lagfret30)) -1;
  week7= exp( sum(of  lagfret31-lagfret35)) -1;
  week8= exp( sum(of  lagfret36-lagfret40)) -1;


  week9= exp( sum(of  lagfret41-lagfret45)) -1;
  week10= exp( sum(of  lagfret46-lagfret50)) -1;
  week11= exp( sum(of  lagfret51-lagfret55)) -1;
  week12= exp( sum(of  lagfret56-lagfret60)) -1;
  week13 = exp( sum(of lagfret66-lagfret70)) -1;


/* Drop unnecessary variables */

    drop l: count i;

%mend calcret;  /* End of macro  */
%calcret;       /* Run the macro */

/* Restore chronological order */
proc sort data=RET2;
  by TICKER date;
run;



DATA CRSP3;
MERGE CRSP3 RET2;
BY TICKER DATE;
IF TICKER = 'GME' then gme_amc_flag =1;
IF TICKER = 'AMC' then gme_amc_flag =1;
IF GME_AMC_FLAG = . THEN GME_AMC_FLAG =0;
IF NET_DD_POSTS = . THEN NET_DD_POSTS =0;
IF NET_DD_POSTS >0 THEN NET_DD_SIGN =1;
IF NET_DD_POSTS =0 THEN NET_DD_SIGN =0;
IF . <NET_DD_POSTS <0 THEN NET_DD_SIGN =-1;
RUN;






*will now bring in ibes data;





data revisions;
set   mylib.ibes17_21;
ticker = oftic;
id = ticker||ANNDATS_ACT;
if anndats_act = . then delete;
drop ANNTIMS_ACT  MEASURE;
run;


proc sort data=revisions;
by fpi id analys   anndats;
run;

data revisions2;
set revisions;
old_value = lag(value);
lag_analys = lag(analys);
if analys ~= lag_analys then old_value = .;
revision = value - old_value;
lag_fpi = lag(fpi);
if analys ~= lag_analys then lag_fpi = .;
keep id ticker anndats_act analys value old_value revision lag_analys anndats fpi lag_fpi;
run;

data revisions2;
set revisions2;
if revision = . then delete;
if revision >0 then up =1;
if revision <0 then up =0;
if revision =0 then delete;

run;


proc sort data=revisions2;
by ticker anndats;
run;
proc univariate noprint data=revisions2;
by ticker anndats;
var up;
output out=qtr_revisions sum=up_revisionsQ N=total_revisionsQ;
where fpi = '6';
run;
proc univariate noprint data=revisions2;
by ticker anndats;
var up;
output out=annual_revisions sum=up_revisionsA N=total_revisionsA;
where fpi = '1';
run;

DATA ALL_REVISIONS;
MERGE QTR_REVISIONS ANNUAL_REVISIONS;
BY TICKER anndats;
IF UP_REVISIONSQ = . THEN UP_REVISIONSQ =0;
IF up_revisionsA = . THEN UP_REVISIONSA =0;
IF total_revisionsQ = . THEN total_revisionsQ =0;
IF total_revisionsA = . THEN total_revisionsA =0;

*will combine quarterly ann annual revisions;

up_revisions = up_revisionsQ + up_revisionsA;
total_revisions  = total_revisionsQ + total_revisionsA;
date = anndats;
if ticker = '' then delete;

keep ticker date up_revisions total_revisions;
RUN;





proc sort data=all_revisions;
by ticker date;
run;
proc sort data=crsp3;
by ticker date;
run;
data crsp3;
merge crsp3 all_revisions;
by ticker date;
if up_revisions = . then up_revisions =0;
if total_revisions = . then total_revisions =0;
if year = 2017 then delete;
if ret5 = . then delete;
run;




*compute revisions over various horizons;


data revisions;
set crsp3;
keep ticker date up_revisions total_revisions;
run;
proc sort data=revisions;
by ticker desending date;
run;



%macro calcret;
data revisions2;
set revisions;
  by ticker;



/* Define arrays to create the lagged values */

    array uprevision[63] uprevision1-uprevision63;
    array revision[63] revision1-revision63;

/* This is the macro portion: a "do-loop" to create the 59 necessary lags */

    %do j=1 %to 63;
      uprevision&j=lag&j(up_revisions);
        revision&j=lag&j(total_revisions);

    %end;

/* The following statements set to missing lagged values that */
/* are reading the data corresponding to the previous PERMNO  */

    if first.ticker then count=1;
    do i=count to 63;
      uprevision[i] = .;
      revision[i] = .;

    end;
    count +1 ;

/* Calculate the cumulative returns by adding the logs of the returns*/




  up_revision63= sum(of  uprevision1-uprevision63) ;
    up_revision21=  sum(of  uprevision1-uprevision21);
      up_revision5 = sum(of  uprevision1-uprevision5) ;

total_revision63= sum(of  revision1-revision63) ;
    total_revision21=  sum(of  revision1-revision21) ;
      total_revision5 = sum(of  revision1-revision5) ;




/* Drop unnecessary variables */

    drop uprevision1-uprevision63 revision1-revision63 count i;

%mend calcret;  /* End of macro  */
%calcret;       /* Run the macro */

/* Restore chronological order */
proc sort data=revisions2;
  by ticker date;
run;

data revisions2b;
set revisions2;
if total_revision5 = 0 then total_revision5 =.;
if total_revision21 = 0 then total_revision21 =.;
if total_revision63 = 0 then total_revision63 =.;
percent_up5 = up_revision5/total_revision5;
percent_up21 = up_revision21/total_revision21;
percent_up63 = up_revision63/total_revision63;
run;




proc sort data=crsp3;
by ticker date;
run;
proc sort data=revisions2b;
by ticker date;
run;






data crsp_ibes;
merge crsp3 revisions2b;
by ticker date;


*drop unnecessary variables;
drop   up_revision5  total_revision5 up_revision21 total_revision21 up_revision63 total_revision63     ;
run;



data ibes6;
set  MYLIB.ibes17_21;
ticker = oftic;
where fpi = '6';
keep ticker anndats_act actual;
run;

proc sort nodupkey data=ibes6;
by ticker anndats_act;
run;

data ibes6;
set ibes6;
by ticker;
format l1_anndats_act YYMMDD10.;
l1_anndats_act = lag(anndats_act);
if first.ticker then l1_anndats_act = .;
YEAR = YEAR(anndats_act);
run;



DATA IBES_SAMPLE;
SET IBES6;
if ticker = '' then delete;
IF l1_anndats_act = . THEN DELETE;
IF ANNDATS_ACT = . THEN DELETE;
RUN;



data crsp2;
set crsp_ibes;
year = year(date);
if year >=2017;
month = month(date);
cum_month = (year -2018)* 12 + month;
keep ticker date year month cum_month ;
run;


PROC SQL;
CREATE TABLE CRSP2 AS
SELECT *
FROM crsp2, IBES_SAMPLE
WHERE crsp2.ticker = IBES_SAMPLE.ticker AND crsp2.DATE >= IBES_SAMPLE.L1_ANNDATS_ACT AND crsp2.DATE < IBES_SAMPLE.ANNDATS_ACT;
QUIT;


DATA CRSP2B;
SET CRSP2;
KEEP ticker DATE L1_ANNDATS_ACT;
RUN;



data ibes2;
set   mylib.ibes17_20_fpi6and1 mylib.ibes2021_june;
where fpi = '6';
ticker = oftic;
if ticker = '' then delete;
YEAR = YEAR(ANNDATS);
keep ticker  ANALYS  ANNDATS YEAR VALUE;
run;

PROC SQL;
CREATE TBALE IBES3 AS
SELECT *
FROM CRSP2B, IBES2
WHERE CRSP2B.ticker = IBES2.ticker AND CRSP2B.DATE >= IBES2.ANNDATS AND CRSP2B.L1_ANNDATS_ACT <= IBES2.ANNDATS;
QUIT;




PROC SORT DATA=IBES3;
BY ticker DATE ANALYS ANNDATS;
RUN;


PROC SORT NODUPKEY DATA=IBES3 OUT=IBES3B;
BY ticker DATE ANALYS ;
RUN;
PROC MEANS NOPRINT DATA=IBES3B;
BY ticker DATE;
VAR VALUE;
OUTPUT OUT=CONSENSUS MEAN=AVE_VALUE MEDIAN=MED_VALUE n=FORECASTS;
RUN;


DATA CONSENSUS;
SET CONSENSUS;
KEEP ticker DATE AVE_VALUE MED_VALUE FORECASTS;
RUN;

PROC SORT DATA=CONSENSUS;
BY ticker DATE;
RUN;
PROC SORT DATA=CRSP2;
BY ticker DATE;
RUN;
DATA CRSP3;
MERGE CRSP2 CONSENSUS;
BY ticker DATE;
RUN;



PROC SORT DATA=CRSP_IBES;
BY ticker DATE;
RUN;
PROC SORT DATA=CRSP3;
BY ticker DATE;
RUN;
DATA CRSP_IBES2;
MERGE CRSP_IBES CRSP3;
BY ticker DATE;


IF ACTUAL > MED_VALUE THEN FE2 =1;
IF ACTUAL <MED_VALUE THEN FE2 = -1;
IF ACTUAL = MED_VALUE THEN FE2 =0;
IF ACTUAL = . THEN FE2 =0;
IF AVE_VALUE = . THEN FE2 =0;


if fe2 =1 then beat =1; else beat =0;
if MED_VALUE = . then beat = .;


RUN;




*will now compute lagged values (e.g., momentum);



proc sort data=crsp_ibes2;
by permno date;
run;

%macro calcret;
data crsp_ibes3;
set crsp_ibes2;
  by permno;

/* Take natural logs of the firm return (ret) and decile size return (decret) */

   lfret=log(abn_ret+1);


/* Define arrays to create the lagged values */

    array lagfret[26] lagfret1-lagfret26;

/* This is the macro portion: a "do-loop" to create the 59 necessary lags */

    %do j=1 %to 26;
       lagfret&j=lag&j(lfret);


    %end;

/* The following statements set to missing lagged values that */
/* are reading the data corresponding to the previous PERMNO  */

    if first.permno then count=1;
    do i=count to 26;
      lagfret[i] = .;


    end;
    count +1 ;

/* Calculate the cumulative returns by adding the logs of the returns*/

  



	  mom5 =exp( sum(of  lagfret1-lagfret5)) -1;
  	mom6_26 =exp( sum(of  lagfret6-lagfret26)) -1;
		


/* Drop unnecessary variables */

    drop lagfret1-lagfret26   count i;

%mend calcret;  /* End of macro  */
%calcret;       /* Run the macro */

/* Restore chronological order */
proc sort data=crsp_ibes3;
  by permno date;
run;


*now bring in news sentiment collected from bloomberg;



data sentiment;
set mylib.news_sentiment17_21;
keep date ticker news_sentiment;
run;
proc sort nodupkey data=sentiment;
by ticker descending date;
run;




%macro calcret;
data sentiment2;
set sentiment;
  by ticker;

/* Take natural logs of the firm return (ret) and decile size return (decret) */

   lfret=news_sentiment;


/* Define arrays to create the lagged values */

    array lagfret[63] lagfret1-lagfret63;


/* This is the macro portion: a "do-loop" to create the 59 necessary lags */

    %do j=1 %to 63;
       lagfret&j=lag&j(lfret);
    %end;

/* The following statements set to missing lagged values that */
/* are reading the data corresponding to the previous PERMNO  */

    if first.ticker then count=1;
    do i=count to 63;
      lagfret[i] = .;

    end;
    count +1 ;

/* Calculate the cumulative returns by adding the logs of the returns*/

  


  sentiment63= sum(of  lagfret1-lagfret63) ;
    sentiment21= sum(of  lagfret1-lagfret21) ;
	  sentiment5 =sum(of  lagfret1-lagfret5) ;


/* Drop unnecessary variables */

    drop l: count i;

%mend calcret;  /* End of macro  */
%calcret;       /* Run the macro */

/* Restore chronological order */
proc sort data=sentiment2;
  by ticker date;
run;



proc sort data=crsp_ibes3;
by ticker date;
run;

data crsp_ibes4;
merge crsp_ibes3 sentiment2;
by ticker date;
if ret5 = . then delete;
run;




PROC SORT DATA=crsp_ibes4;
BY ticker DATE;
RUN;


%macro calcret;
data crsp_ibes4;
set crsp_ibes4;
  by ticker;

/* Take natural logs of the firm return (ret) and decile size return (decret) */



/* Define arrays to create the lagged values */

  
	array lagsent [26] lagsent1-lagsent26;
 

/* This is the macro portion: a "do-loop" to create the 59 necessary lags */

    %do j=1 %to 26;

		lagsent&j=lag&j(news_sentiment);


    %end;

/* The following statements set to missing lagged values that */
/* are reading the data corresponding to the previous PERMNO  */

    if first.ticker then count=1;
    do i=count to 26;

 		lagsent[i] = .;
	


    end;
    count +1 ;

/* Calculate the cumulative returns by adding the logs of the returns*/

  




		  lag_sent5 =sum(of  lagsent1-lagsent5);
  	lag_sent6_26 = sum(of  lagsent6-lagsent26) ;



/* Drop unnecessary variables */

    drop  lagsent1-lagsent26 count i;

%mend calcret;  /* End of macro  */
%calcret;       /* Run the macro */

/* Restore chronological order */
proc sort data=crsp_ibes4;
  by ticker date;
run;


data crsp_ibes4;
set crsp_ibes4;

*if news is missing set equal to zero;
IF NEWS_SENTIMENT = . THEN NEWS_SENTIMENT =0;
IF lag_sent5 = . THEN lag_sent5 =0;
IF lag_sent6_26 = . THEN lag_sent6_26 =0;
IF sentiment5 = . THEN sentiment5 =0;
IF sentiment21 = . THEN sentiment21 =0;
IF sentiment63 = . THEN sentiment63 =0;


*compute two difference variables to be used in earnings tests;

date_dif = anndats_act - date;
date_dif2 = date - l1_anndats_act;

*create an alterative revision value that sets missing values to 50% (i.e., equal down and up);

percent_up21b = percent_up21;
percent_up5b = percent_up5;

if percent_up21 = . then percent_up21b = 0.50;
if percent_up5 = . then percent_up5b = 0.50;

ln_forecasts = log(1+forecasts);


run;






*WANT TO ADD IN SIZE IN THE PREVIOUS MONTH;


DATA CRSP_IBES4;
SET crsp_ibes4;
CUM_MONTH2 = MIN (CUM_MONTH, 37);
TRACKER = _N_;
RUN;

DATA SIZE;
SET MYLIB.CRSP17_21D;
YEAR = YEAR(DATE);
IF YEAR >=2017;
MONTH = MONTH(DATE);
*ADDING 1 SO THAT VALUE OF SIZE IS FROM PREVIOUS MONTH;
CUM_MONTH2 = (YEAR - 2018) * 12 + MONTH +1;
SIZE = ABS(PRC * SHROUT);
LN_SIZE = LOG (SIZE);
IF TICKER = '' THEN DELETE;
SHRCD2 = SHRCD;
KEEP  CUM_MONTH2  LN_SIZE SHRCD2 DATE TICKER ;
RUN;
PROC SORT DATA=SIZE;
BY TICKER CUM_MONTH2 DESCENDING DATE;
RUN;

PROC SORT NODUPKEY DATA=SIZE;
BY TICKER CUM_MONTH2 ;
RUN;


*taking book-value from financial ratios dataset provided by wrds;
DATA BM;
SET MYLIB.FINANCIAL_RATIOS;
YEAR = YEAR(public_date);
IF YEAR >=2017;
MONTH = MONTH(public_date);
*ADDING 1 SO THAT VALUE OF SIZE IS FROM PREVIOUS MONTH;
CUM_MONTH2 = (YEAR - 2018) * 12 + MONTH +1;
IF TICKER = '' THEN DELETE;
KEEP TICKER  CUM_MONTH2 bm;
RUN;
PROC SORT NODUPKEY DATA=BM;
BY TICKER CUM_MONTH2;
RUN;
DATA SIZE;
MERGE SIZE BM;
BY TICKER CUM_MONTH2;
RUN;





PROC SQL;
CREATE TABLE CRSP_IBES4B AS
SELECT *
FROM CRSP_IBES4, SIZE
WHERE CRSP_IBES4.TICKER = SIZE.TICKER AND CRSP_IBES4.CUM_MONTH2 = SIZE.CUM_MONTH2;
QUIT;
PROC SORT NODUPKEY DATA=CRSP_IBES4B;
BY TRACKER;
RUN;
PROC SORT DATA=CRSP_IBES4;
BY TRACKER;
RUN;
DATA CRSP_IBES4;
MERGE CRSP_IBES4 CRSP_IBES4B;
BY TRACKER;
RUN;


data crsp_ibes5;
set crsp_ibes4;

LN_BM = LOG(BM);
IF LN_BM = . THEN BM_MISSING =1; ELSE BM_MISSING =0;
IF BM_MISSING =1 THEN LN_BM =0;
IF LN_SIZE = . THEN SIZE_MISSING =1; ELSE SIZE_MISSING =0;
IF LN_SIZE = . THEN LN_SIZE =0;
IF RET5 >10 THEN RET5 =10;
IF RET21 >15 THEN RET21 =15;
if cum_month >36 then d2021 =1;
else d2021 =0;
net_dd_posts21 =  net_dd_posts * d2021;
run;



DATA MYLIB.DAILY_PANELQ2_2021C;
SET CRSP_IBES5;
RUN;




*will modify by making the following adjustments:

1) changing definition of post-GME
2) Adding SA data 
3) adding non-research post data
4) winsorizing Net DD (and other variables) at 5 and -5;

DATA INFO;
SET MYLIB.DAILY_PANELQ2_2021C;
IF 7<=CUM_MONTH<=42;
pre_gme = .;
post_gme = .;
day = day(date);
month = month(date);

if cum_month <37 then pre_gme =1;
if cum_month > 37 then post_gme =1;

if cum_month = 37 and day <13 then pre_gme =1;
if cum_month = 37 and day >13 then post_gme =1;

if cum_month = 37 and day =13 then delete;

if pre_gme = . then pre_gme =0;
if post_gme = . then post_gme =0;

if cum_month = 37 and 8<=day<=18 then event_flag5 =1;
else event_flag5 =0;

run;


*************** bring in non-research posts ******;





Data posts;
set mylib.NON_RESEARCH_POSTS;
hour = hour(created_utc);
minute = minute(created_utc);
if hour > 16 then next_day =1;
else next_day =0;
post_date = datepart(created_utc);

adj_date =  post_date + next_day;
weekday = weekday(adj_date);
tradedate = adj_date;
if weekday = 7 then tradedate = adj_date +2;
if weekday = 1 then tradedate = adj_date +1;

format post_date yymmdd10.;
format tradedate yymmdd10.;

year = year(tradedate);
month = month(tradedate);
day = day(tradedate);


cum_month = (year -2018)* 12 + month;

IF 7<=CUM_MONTH<=42;
comments  = num_comments * 1;
run;


proc sort data=posts;
by author flair definite_ticker tradedate descending   comments;
run;
proc means data=posts;
var comments;
run;

*delete duplicate posts by same author and date - 56503;
proc sort nodupkey data=posts out=posts2;
by author flair definite_ticker tradedate;
run;


*focus on subset of posts that are very clearly non-research related (N = 20,269);




data posts2;
set posts2;

if flair = 'Gain' then keep =1;
if flair = 'Loss' then keep =1;
if flair = 'Meme' then keep =1;
if flair = 'Shitpost' then keep =1;
if flair = 'News' then keep =1;

if keep =1;

keep author flair definite_ticker tradedate comments cum_month;
run;


proc sort data=posts2;
by definite_ticker tradedate;
run;
proc univariate noprint  data=posts2;
by definite_ticker tradedate;
var comments;
output out=non_research_posts n=non_research_posts;
run;




*require that firm can be matched to daily_panel_sample (CRSP - common stocks);


data daily_panelq2_2021;
set mylib.daily_panelq2_2021C;
keep ticker permno  DATE;
run;



*13,341 posts;




proc sql;
create table non_research_posts2 as
select *
from non_research_posts, daily_panelq2_2021
where non_research_posts.definite_ticker = daily_panelq2_2021.ticker and non_research_posts.tradedate =  daily_panelq2_2021.date;
quit;



***************************************************************************
*now bring in SA reports;



***************************************************************************;


proc sort data=mylib.seeking_alpha2b;
by ticker tradedate;
run;
proc univariate noprint data=mylib.seeking_alpha2b;
by ticker tradedate;
var buy;
output out=sa_net_buy sum = sa_buys N=sa_posts;
run;

data sa_net_buy2;
set sa_net_buy;
format date yymmdd9.;
sa_sells = sa_posts - sa_buys;
date = tradedate;
drop tradedate;
run;




*combine INFO, non_research_posts, AND SA DATA;
proc sort data=info;
by date ticker;
run;
proc sort data=sa_net_buy2;
by date ticker;
run;
proc sort data=non_research_posts2;
by date ticker;
run;


data info;
merge info non_research_posts2 sa_net_buy2;
by date ticker;
if sa_buys = . then sa_buys =0;
if sa_sells = . then sa_sells =0;
if sa_posts = . then sa_posts =0;

net_sa = sa_buys - sa_sells;
IF non_research_posts = . THEN NON_RESEARCH_POSTS =0;


NET_DD2 = NET_DD_POSTS;
IF NET_DD2 >6  THEN NET_DD2 =6;
IF . < NET_DD2 <-6 THEN NET_DD2 = -6;

NET_SA2 = net_sa;
IF NET_SA2 >5  THEN NET_SA2 =5;
IF . < NET_SA2 <-5 THEN NET_SA2 = -5;

NON_RESEARCH2 =  NON_RESEARCH_POSTS;
IF NON_RESEARCH2 >22 THEN NON_RESEARCH2 =22;

NET_DD2_POST =  NET_DD2 * POST_GME;
NET_SA2_POST = NET_SA2 * POST_GME;
NON_RESEARCH2_POST = NON_RESEARCH2 * POST_GME;

net_dd2_pre = NET_DD2 * (1-POST_GME);



if ret5 = . then delete;
DD_and_SA = NET_DD2 + NET_SA2;
DD_and_SA_POST = NET_DD2_POST + NET_SA2_POST;

DD_and_Non_Research = Net_DD2 + NON_RESEARCH2;
DD_and_Non_Research_post = NET_DD2_POST + NON_RESEARCH2_POST;
run;
data mylib.daily_panel_rfs;;
set info;
run;
